San Diego: Crime and Housing

Author

Maxim Nikiforov

Group Seven - San Diego: Crime and Housing

This is a simple presentation of my code for this project.

Thank you to Jose Macias for his help!

# About this script
# Students, you will use this script to help you follow along the data workshop. 
# Each script is named for the city you will explore and use. This script is for San Diego, California.

# Libraries to use
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(leaflet)
library(leaflet.extras)
library(sf)
Linking to GEOS 3.13.1, GDAL 3.10.2, PROJ 9.5.1; sf_use_s2() is TRUE
library(scales)

Attaching package: 'scales'

The following object is masked from 'package:purrr':

    discard

The following object is masked from 'package:readr':

    col_factor
# Read the csv in and generate new columns
sd_crime <- read_csv("sd_crime.csv") 
Rows: 76990 Columns: 25
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (16): nibrs_uniq, code_section, group_type, ibr_category, crime_against...
dbl   (8): case_number, violent_crime, property_crime, beat, service_area, d...
dttm  (1): occured_on

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
sd_crime <- sd_crime %>%
  mutate(
    GEOID = as.character(GEOID),
    crime_type = as_factor(pd_offense_category),
    date_time = ymd_hms(occured_on),  # or use parse_date_time() if needed
    year = year(date_time),
    month = month(date_time),
    weekday = wday(date_time, label = TRUE),
    season = as_factor(case_when(
      month %in% 3:5 ~ "Spring",
      month %in% 6:8 ~ "Summer",
      month %in% 9:11 ~ "Fall",
      month %in% c(12, 1, 2) ~ "Winter"
    ))
  )
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `date_time = ymd_hms(occured_on)`.
Caused by warning:
!  7 failed to parse.
# Read in affordable housing csv
sd_afh <- read_csv("sd_afh.csv") %>%
  mutate(GEOID = as.character(GEOID))
Rows: 399 Columns: 12
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (8): PROPERTY_NAME, ADDRESS, GROUP_SERVICED, GEOID, Block, Tract, County...
dbl (4): TOTAL_UNITS, AFFORDABLE_UNITS, Long, Lat

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Read in San Diego shapefile for tracts
sd_tracts <- st_read("sd_tracts.shp")
Reading layer `sd_tracts' from data source 
  `C:\Users\harol\Documents\sd_tracts.shp' using driver `ESRI Shapefile'
Simple feature collection with 1040 features and 6 fields
Geometry type: POLYGON
Dimension:     XY
Bounding box:  xmin: -117.2822 ymin: 32.53444 xmax: -116.423 ymax: 33.16414
Geodetic CRS:  WGS 84
sd_tracts %>% mutate(GEOID = as.character(GEOID))
Simple feature collection with 1040 features and 6 fields
Geometry type: POLYGON
Dimension:     XY
Bounding box:  xmin: -117.2822 ymin: 32.53444 xmax: -116.423 ymax: 33.16414
Geodetic CRS:  WGS 84
First 10 features:
          GEOID         Block               Tract           County      State
1  060730073032 Block Group 2  Census Tract 73.03 San Diego County California
2  060730083551 Block Group 1  Census Tract 83.55 San Diego County California
3  060730214021 Block Group 1 Census Tract 214.02 San Diego County California
4  060730075013 Block Group 3  Census Tract 75.01 San Diego County California
5  060730054011 Block Group 1  Census Tract 54.01 San Diego County California
6  060730039013 Block Group 3  Census Tract 39.01 San Diego County California
7  060730083272 Block Group 2  Census Tract 83.27 San Diego County California
8  060730045012 Block Group 2  Census Tract 45.01 San Diego County California
9  060730083433 Block Group 3  Census Tract 83.43 San Diego County California
10 060730093053 Block Group 3  Census Tract 93.05 San Diego County California
   hshld__                       geometry
1    93030 POLYGON ((-117.2551 32.7414...
2   114821 POLYGON ((-117.1357 32.9242...
3    83409 POLYGON ((-117.2306 32.7372...
4    58808 POLYGON ((-117.2471 32.7514...
5       NA POLYGON ((-117.161 32.70938...
6    43043 POLYGON ((-117.1339 32.7032...
7   172109 POLYGON ((-117.2344 32.9642...
8    79844 POLYGON ((-117.1403 32.7194...
9    72014 POLYGON ((-117.2165 32.8664...
10  142173 POLYGON ((-117.1335 32.7858...

Area 3: Crime by Season and Type

seasonal_crime  <- function(dateframe, season_type){
  dateframe %>%
    filter(!is.na(season),
           season == season_type) %>%
    group_by(season, crime_type) %>%
    summarise(total_crime = n()) %>%
    arrange(desc(total_crime)) %>%
    head(15)
}

fall <- seasonal_crime(sd_crime, "Fall")
`summarise()` has grouped output by 'season'. You can override using the
`.groups` argument.
winter <- seasonal_crime(sd_crime, "Winter")
`summarise()` has grouped output by 'season'. You can override using the
`.groups` argument.
spring <- seasonal_crime(sd_crime, "Spring")
`summarise()` has grouped output by 'season'. You can override using the
`.groups` argument.
summer <- seasonal_crime(sd_crime, "Summer")
`summarise()` has grouped output by 'season'. You can override using the
`.groups` argument.
full_join(winter, fall) %>%
  full_join(spring) %>%
  full_join(summer) %>%
  ggplot(aes(x = crime_type, y = total_crime)) +
  geom_col() +
  labs(title = "Figure 4: Crime in San Diego by Season, 2020-2024", x = "Season", y = "Total Crimes") +
  theme_minimal() +
  coord_flip() +
  facet_wrap(~season, scales = "free")
Joining with `by = join_by(season, crime_type, total_crime)`
Joining with `by = join_by(season, crime_type, total_crime)`
Joining with `by = join_by(season, crime_type, total_crime)`

Area 4: Crime Distribution by Geography

Crime by County

sd_crime %>%
  filter(!is.na(County)) %>%
  group_by(County) %>%
  summarise(total_crime = n()) %>%
  ggplot(aes(x = County, y = total_crime)) +
    geom_col(fill = "steelblue") +
    labs(title = "Figure 5: Crime in San Diego by County, 2020–2024", x = "County", y = "Total Crimes") +
    theme_minimal()

Crime by Block

sd_crime %>%
  group_by(Block) %>%
  summarise(total_crime = n()) %>%
  ggplot(aes(x = Block, y = total_crime)) +
    geom_col(fill = "steelblue") +
    labs(title = "Figure 6: Crime in San Diego by Block, 2020–2024", x = "Block", y = "Total Crimes") +
    theme_minimal()

Crime by Neighborhood

sd_crime %>%
  filter(!is.na(neighborhood)) %>%
  group_by(neighborhood) %>%
  summarise(total_crime = n()) %>%
  ggplot(aes(x = neighborhood, y = total_crime)) +
    geom_col(fill = "steelblue") +
    labs(title = "Figure 7: Crime in San Diego by Neighborhood, 2020–2024", x = "Neighborhood", y = "Total Crimes") +
    theme_minimal()

Area 5: When Do Crimes Occur?

Crime by Weekday

sd_crime %>%
  filter(!is.na(weekday)) %>%
  group_by(weekday) %>%
  summarise(total_crime = n()) %>%
  ggplot(aes(x = weekday, y = total_crime)) +
    geom_col(fill = "steelblue") +
    labs(title = "Figure 8: Crime in San Diego by Weekday, 2020–2024", x = "Weekday", y = "Total Crimes") +
    theme_minimal()

Crime by Hour of the Day

sd_crime %>%
  mutate(hour_local = hour(with_tz(date_time, tzone = "America/Chicago"))) %>%
  group_by(hour_local) %>%
  summarise(total_crime = n(), .groups = "drop") %>%
  ggplot(aes(x = hour_local, y = total_crime)) +
    geom_col(fill = "steelblue") +
    labs(title = "Figure 9: Crime in San Diego by Hour, 2020–2024", x = "Hour of the Day", y = "Total Crimes") +
    theme_minimal()
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_col()`).

Phase 2: Affordable Housing and Crime

Task 2: Crime and Affordable Housing Analysis

Total Affordable Housing Units in San Diego

sd_afh %>% 
  filter(!is.na(PROPERTY_NAME)) %>% 
  summarise(total_units = sum(TOTAL_UNITS, na.rm = TRUE))
# A tibble: 1 × 1
  total_units
        <dbl>
1       19010

Affordable Housing by Block

sd_afh %>%
  filter(!is.na(PROPERTY_NAME)) %>%
  group_by(Block) %>%
  summarise(TOTAL_UNITS = sum(TOTAL_UNITS, na.rm =TRUE)) %>%
  ggplot(aes(x = Block, y = TOTAL_UNITS)) +
    geom_col(fill = "steelblue") +
    labs(title = "Figure 10: Affordable Housing in San Diego by Block, 2020–2024", x = "Block Group", y = "Total Units") +
    theme_minimal()

Phase 3: Mapping Crime, Affordable Housing & Income

Map of Crime, Housing, and Income

# Task 3: From you analysis, lets visualize crime, affordable housing and income distribution to understand further what is going on
# Area 1) Load leaflet and city tracts
# library(leaflet)
# library(leaflet.extras)
# library(sf)
top8 <- as.character(sd_crime %>%
                       group_by(crime_type) %>%
                       summarize(total = n()) %>%
                       arrange(desc(total)) %>%
                       head(8) %>%
                       pull(crime_type))
# defining colors according to the top eight crimes
pal <- colorFactor(
  palette ='Dark2'
  ,
  domain = top8
)
# defining a color pallet for the income variables
pal_income <- colorBin("RdYlBu", domain = sd_tracts$hshld__, bins = 10)

# Create a new palette function using colorNumeric and a Brewer palette
pal_income <- colorNumeric(
  palette = "YlGnBu",      # Choose from RColorBrewer or use custom
  domain = sd_tracts$hshld__, 
  na.color = "transparent"
)

Visualising Crime Locations

# Area 2) Pick a year to analyze and create a map where you plot crime locations using the latitude and longitude, select a subset of crimes to visualize otherwise, you might find your map hard communicate information. 

sd_crime %>%
  filter(crime_type %in% top8,
         year == 2021) %>%
  leaflet() %>% 
  addProviderTiles("CartoDB") %>% 
  addPolygons(data = sd_tracts,
              weight = 2,
              opacity = 0.2,
              dashArray = '3',
              color = "black",
              fillColor = ~pal_income(hshld__),
              label = ~paste0("Median Income: ", dollar(hshld__)),
              popup = ~paste0("Census Tract: ", Tract, "<br>",
                              "Median Income: ", dollar(hshld__)),
              fillOpacity = 0.7,
              highlightOptions = highlightOptions(
                weight = 3,
                color = "#666",
                fillOpacity = 0.9),
              ,
              group = "Income Distribution"
  ) %>%
  addCircleMarkers(lng = ~lng, lat = ~lat,
                   radius = 2,
                   color = ~ pal(crime_type),
                   label = ~ crime_type) %>%
  addCircleMarkers(
    data = sd_afh,
    lng = ~Long,
    lat = ~Lat,
    radius = ~sqrt(TOTAL_UNITS)/2,
    color = "#00CCFF",
    label = ~PROPERTY_NAME) %>%
  addLegend(
    pal = pal, values = top8, 
    title = "Crime Type",
    opacity = 1, 
    position = "bottomright"
  ) %>%
  addLegend(
    data = sd_tracts,
    pal = pal_income, values = ~hshld__,
    opacity = 0.7, title = "Income Distribution",
    position = "bottomright"
  )
# make a heatmap
sd_crime %>%
  filter(crime_type %in% top8, year == 2021) %>%
  leaflet() %>%
  addProviderTiles("CartoDB") %>%
  addCircleMarkers(
    data = sd_afh,
    lng = ~Long,
    lat = ~Lat,
    radius = ~sqrt(TOTAL_UNITS)/2,
    color = "black",
    label = ~PROPERTY_NAME,
    group = "Affordable Housing") %>%
  addHeatmap(lng = ~lng, lat = ~lat,
             blur = 20, max = 0.05, radius = 15,
             group = "Heat Map"
  ) %>%
  addLayersControl(
    overlayGroups = c("Affordable Housing","Heat Map"),
    options = layersControlOptions(collapsed = TRUE)
  )

Heatmap of Crime Locations

sd_crime %>%
  filter(crime_type %in% top8, year == 2021) %>%
  leaflet() %>%
  addProviderTiles("CartoDB") %>%
  addCircleMarkers(
    data = sd_afh,
    lng = ~Long,
    lat = ~Lat,
    radius = ~sqrt(TOTAL_UNITS)/2,
    color = "black",
    label = ~PROPERTY_NAME,
    group = "Affordable Housing") %>%
  addHeatmap(lng = ~lng, lat = ~lat,
             blur = 20, max = 0.05, radius = 15,
             group = "Heat Map"
  ) %>%
  addLayersControl(
    overlayGroups = c("Affordable Housing", "Heat Map"),
    options = layersControlOptions(collapsed = TRUE)
  )

Clustered Crime Map

sd_crime %>%
  filter(crime_type %in% top8, year == 2021) %>%
  leaflet() %>%
  addProviderTiles("CartoDB") %>%
  addPolygons(data = sd_tracts,
              weight = 2,
              opacity = 0.2,
              dashArray = '3',
              color = "black",
              fillColor = ~pal_income(hshld__),
              label = ~paste0("Median Income: ", dollar(hshld__)),
              popup = ~paste0("Census Tract: ", Tract, "<br>",
                              "Median Income: ", dollar(hshld__)),
              fillOpacity = 0.7,
              highlightOptions = highlightOptions(
                weight = 3,
                color = "#666",
                fillOpacity = 0.9),
              group = "Income Distribution"
  ) %>%
  addMarkers(
    clusterOptions = markerClusterOptions(),
    lng = ~lng, lat = ~lat,
    popup = ~crime_type
  ) %>%
  addCircleMarkers(
    data = sd_afh,
    lng = ~Long,
    lat = ~Lat,
    radius = ~sqrt(TOTAL_UNITS)/2,
    color = "#fa9fb5",
    label = ~PROPERTY_NAME) %>%
  addLegend(
    data = sd_tracts,
    pal = pal_income, values = ~hshld__,
    opacity = 0.7, title = "Income Distribution",
    position = "bottomright"
  )

Phase 4: Joining Crime Data with Census Tracts

Area 1: Group Crime Data by Census Tract

total_crime <- 
  sd_crime %>%
  filter(year == 2021) %>%
  group_by(GEOID, Block, Tract, County, State) %>%
  summarise(total_crime = n()) %>%
  ungroup() %>%
  arrange(desc(total_crime))
`summarise()` has grouped output by 'GEOID', 'Block', 'Tract', 'County'. You
can override using the `.groups` argument.

Area 2) Join the affordable housing data into the dataframe with crime and census tracts

total_afh <- sd_afh %>%
  group_by(GEOID,Block,Tract,County,State) %>%
  summarise(total_housing = n(),
            total_units = sum(TOTAL_UNITS)) %>%
  ungroup()
`summarise()` has grouped output by 'GEOID', 'Block', 'Tract', 'County'. You
can override using the `.groups` argument.
sd_tracts_crime_housing <-
  sd_tracts %>%
  left_join(total_afh, join_by(GEOID,Block,Tract,County,State)) %>%
  left_join(total_crime,join_by(GEOID,Block,Tract,County,State)) %>%
  mutate(total_units = replace_na(total_units,0),
         total_housing = replace_na(total_housing,0),
  ) %>% st_set_geometry(NULL)

sd_tracts_crime_housing %>%
  select(
    Block, Tract, total_housing, total_units, total_crime, hshld__
  ) %>%
  arrange(desc(
    total_crime
  )) %>%
  head(10)
           Block               Tract total_housing total_units total_crime
1  Block Group 1     Census Tract 65             2           0         468
2  Block Group 2 Census Tract 100.15             0           0         259
3  Block Group 1  Census Tract 89.02             0           0         257
4  Block Group 2  Census Tract 85.11             0           0         220
5  Block Group 1  Census Tract 53.02             5           0         211
6  Block Group 2  Census Tract 51.01             2         126         198
7  Block Group 1  Census Tract 79.10             1           0         190
8  Block Group 4  Census Tract 93.07             0           0         187
9  Block Group 3  Census Tract 51.02             1           0         171
10 Block Group 3  Census Tract 89.01             0           0         168
   hshld__
1    39145
2    82143
3    91727
4    83750
5    11628
6    54684
7    85799
8   137783
9    89297
10   82727